Comparison of the BindingSiteSet.txt file between the last releases.
| Release version | Date |
|---|---|
| 10.6 | 2019 July |
| 10.6.3 | - |
| 10.7 | 2020 April |
| 10.8 | 2020 October |
| 10.9 | 2021 April |
| 10.10 | 2022 February |
| 11.0 | 2022 August |
| 11.0.1 (not public) | 2022 September |
| 11.0.2 (not public) | 2022 September |
Notes:
dir_releases <- "_Databases/RegulonDB/releases"
## V10.6 excluded because it doesn't include TU_ID, only TU names which are not unique; otherwise it's almost the same as v10.7 which is included
## Some formatting is done in order to uniformize columns like strand and confidence (uppercase/lowercase, word or symbol +- for the strand, etc)
## Starting v11.0.1 there are 2 evidence columns, here I just merge them so I can compare with older versions
dir_versions <- c("10.7", "10.8", "10.9", "10.10", "11.0", "11.0.1", "11.0.2")
tfbs_sets <- list()
tfbs_versions <- c()
for(v in dir_versions){
version_tag <- paste0("v", v)
set <- read.delim(paste0(dir_releases, "/", v, "/BindingSiteSet.tsv"),
comment.char = "#", header = T, stringsAsFactors = F, na.strings = c("", "NA")) %>%
dplyr::mutate(version = version_tag) %>%
dplyr::mutate(strand = ifelse(strand == "reverse", "-", ifelse(strand == "forward", "+", NA))) %>%
dplyr::mutate(confidence = tolower(confidence)) %>%
dplyr::rowwise() %>%
dplyr::mutate(evidence = ifelse("evidence_function" %in% colnames(.), concat_uniq2(evidence, evidence_function), evidence)) %>%
dplyr::mutate(coords = paste0(start, "_", stop))
assign(paste0("tfbs_set_", version_tag), set)
tfbs_sets[[version_tag]] <- set
tfbs_versions <- c(tfbs_versions, version_tag)
}
all_tfbs <- bind_rows(tfbs_sets) %>%
dplyr::mutate(version = factor(version, levels = tfbs_versions)) %>%
dplyr::mutate(effect = factor(effect, levels = c("+", "-", "?"))) %>%
dplyr::mutate(confidence = factor(confidence, levels = c("weak", "strong", "confirmed")))
all_tfbs_by_evidence <- all_tfbs %>%
tidyr::separate_rows(evidence, sep = ",") %>%
dplyr::mutate(evidence = gsub("\\[|\\]", "", evidence)) %>% #,
tidyr::separate(evidence, c("evidence_code", "evidence_level", "evidence_name"), sep = "\\|") tfbs_summary <- all_tfbs %>%
dplyr::group_by(version) %>%
dplyr::summarise(total = n()) %>%
dplyr::arrange(version)
TFBS_num <- simple_bar(tfbs_summary, "version", "total") +
scale_fill_viridis(discrete = T) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14)) +
labs(x = "Release version", y = "Number of TFBSs", title = "")
TFBS_numDT::datatable(tfbs_summary, rownames= FALSE, options = list(searching = FALSE, lengthChange = FALSE, pageLength = 10))Comments:
tfbs_ids_dupli <- all_tfbs %>%
dplyr::group_by(version, TFBS_ID) %>%
dplyr::summarise(occurrences = n()) %>%
group_by(version, occurrences) %>%
summarise(tfbs_number = n())%>%
dplyr::mutate(occurrences = factor(occurrences))
###
dodge <- ggplot(tfbs_ids_dupli, aes(fill = occurrences, y = tfbs_number, x = version)) +
geom_bar(position = position_dodge(preserve = "single"), stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14)) +
labs(x = "", y = "Number of unique TFBS IDs", title = "TFBS ID duplication in BindingSiteSet.txt across versions")
dodge2 <- ggplot(tfbs_ids_dupli %>% dplyr::filter(!occurrences %in% c("1", "2", "3", "4")), aes(fill = occurrences, y = tfbs_number, x = version)) +
geom_bar(position = position_dodge(preserve = "single"), stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14),
legend.position = "none") +
labs(x = "", y = "Number of unique TFBS IDs", title = "...minimum 5 copies")
dodge / dodge2 ## TFBS IDs that have at least 10 entries in a given version
tfbs_ids_dupli_max <- all_tfbs %>%
dplyr::group_by(version, TFBS_ID) %>%
dplyr::summarise(occurrences = n()) %>%
dplyr::filter(occurrences >=10) %>%
dplyr::arrange(desc(occurrences)) %>%
pivot_wider(names_from = version,
values_from = c(occurrences))
DT::datatable(tfbs_ids_dupli_max, rownames= FALSE, options = list(searching = FALSE, lengthChange = FALSE, pageLength = 10))coordinates <- list()
for (v in tfbs_versions) {
coordinates[[v]] <- unique( (get(paste0("tfbs_set_", v)))$coords)
}
UpSetR::upset(fromList(coordinates), sets = tfbs_versions, order.by = "freq", keep.order = T,
text.scale = c(2,2,2,2,2,2),
queries = list(
list(query = intersects, params = list("v10.9", "v10.10", "v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T),
list(query = intersects, params = list("v10.7", "v10.8"), color = "red", active = T)
)
)coords_dupli <- all_tfbs %>%
dplyr::group_by(version, coords) %>%
dplyr::summarise(occurrences = n()) %>%
group_by(version, occurrences) %>%
summarise(coords_number = n())%>%
dplyr::mutate(occurrences = factor(occurrences))
###
dodge <- ggplot(coords_dupli, aes(fill = occurrences, y = coords_number, x = version)) +
geom_bar(position = position_dodge(preserve = "single"), stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14)) +
labs(x = "", y = "Number of unique TFBS IDs", title = "TFBS ID duplication in BindingSiteSet.txt across versions")
dodge2 <- ggplot(coords_dupli %>% dplyr::filter(!occurrences %in% c("1", "2", "3", "4")), aes(fill = occurrences, y = coords_number, x = version)) +
geom_bar(position = position_dodge(preserve = "single"), stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14),
legend.position = "none") +
labs(x = "", y = "Number of unique TFBS IDs", title = "...minimum 5 copies")
dodge / dodge2 ## coords that have at least 10 entries in a given version
coords_dupli_max <- all_tfbs %>%
dplyr::group_by(version, coords) %>%
dplyr::summarise(occurrences = n()) %>%
dplyr::filter(occurrences >=10) %>%
dplyr::arrange(desc(occurrences)) %>%
pivot_wider(names_from = version,
values_from = c(occurrences))
DT::datatable(coords_dupli_max, rownames= FALSE, options = list(searching = FALSE, lengthChange = FALSE, pageLength = 10))ggplot(all_tfbs, aes(x = distance_TSS, y = version, fill = version)) +
ggridges::geom_density_ridges(color = "white") +
ggridges::theme_ridges() +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme(legend.position = "none") +
xlim(-1000, 1000)ggplot(all_tfbs, aes(x = distance_gene, y = version, fill = version)) +
ggridges::geom_density_ridges(color = "white") +
ggridges::theme_ridges() +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme(legend.position = "none") +
xlim(-1000, 1000)tfbs_effect_long <- all_tfbs %>%
group_by(version, effect) %>%
summarise(value = n())
dodge <- ggplot(tfbs_effect_long, aes(fill = effect, y = value, x = version)) +
geom_bar(position = position_dodge(preserve = "single"), stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "right") +
labs(x = "", y = "Number of TFBS", title = "")
stack <- ggplot(tfbs_effect_long, aes(fill = effect, y = value, x = version)) +
geom_bar(position = "stack", stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "none") +
labs(x = "", y = "Number of TFBS", title = "")
line <- ggplot(tfbs_effect_long, aes(group = effect, y = value, x = version)) +
geom_line(aes(color = effect)) +
geom_point(size = 2, aes(color = effect)) +
scale_color_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "right") +
labs(x = "Release version", y = "Number of TFBS", title = "")
## ggplotly to make it interactive
# fig0 <- ggplotly(TFBS_num)
fig1 <- ggplotly(dodge)
fig2 <- ggplotly(stack)
fig3 <- ggplotly(line)
subplot(fig1, fig2, fig3, nrows=3)effect_summary <- all_tfbs %>%
group_by(version, effect) %>%
summarise(value = n()) %>%
data.frame() %>%
pivot_wider(names_from = version, values_from = c(value)) %>%
arrange(effect)
DT::datatable(effect_summary, rownames= FALSE, options = list(searching = FALSE, lengthChange = FALSE, pageLength = 10))tfbs_evidence_long <- all_tfbs_by_evidence %>%
group_by(version, evidence_code, evidence_name) %>%
summarise(value = n())
evidence_palette <- random_palette(length(unique(all_tfbs_by_evidence$evidence_code)))
##----
dodge <- ggplot(tfbs_evidence_long, aes(fill = evidence_code, y = value, x = version, group = evidence_name)) +
geom_bar(position = position_dodge(preserve = "single"), stat = "identity") +
scale_fill_manual(values = evidence_palette, drop = F, na.value = "gray") +
# scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "right") +
labs(x = "", y = "Number of TFBSs", title = "")
stack <- ggplot(tfbs_evidence_long, aes(fill = evidence_code, y = value, x = version, group = evidence_name)) +
geom_bar(position = "stack", stat = "identity") +
scale_fill_manual(values = evidence_palette, drop = F, na.value = "gray") +
# scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "none") +
labs(x = "", y = "Number of evidence", title = "")
line <- ggplot(tfbs_evidence_long, aes(group = evidence_name, y = value, x = version)) +
geom_line(aes(color = evidence_code)) +
scale_color_manual(values = evidence_palette, drop = F, na.value = "gray") +
geom_point(size = 2, aes(color = evidence_code)) +
# scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "none") +
labs(x = "Release version", y = "Number of TFBS", title = "")
fig0 <- ggplotly(TFBS_num)
fig1 <- ggplotly(dodge)
fig2 <- ggplotly(stack)
fig3 <- ggplotly(line)
subplot(fig0, fig1, fig2, fig3, nrows=4)tfbs_evidence_table <- all_tfbs_by_evidence %>%
group_by(evidence_code, evidence_name) %>%
summarise(version = concat_uniq(version))
DT::datatable(tfbs_evidence_table, rownames= FALSE, options = list(searching = TRUE, lengthChange = FALSE, pageLength = 10))tfbs_evidence_shared <- list()
for (v in tfbs_versions) {
tfbs_evidence_shared[[v]] <- unique((all_tfbs_by_evidence %>% dplyr::filter(version == v))$evidence_code)
}
UpSetR::upset(fromList(tfbs_evidence_shared), sets = tfbs_versions, order.by = "freq", keep.order = T,
text.scale = c(2,2,2,2,2,2),
# queries = list(list(query = intersects, params = list("v10.7"), color = "red", active = T),
# list(query = intersects, params = list("v10.9", "v10.10", "v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T))
)tfbs_confidence_long <- all_tfbs %>%
group_by(version, confidence) %>%
summarise(value = n())
dodge <- ggplot(tfbs_confidence_long, aes(fill = confidence, y = value, x = version)) +
geom_bar(position = position_dodge(preserve = "single"), stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "right") +
labs(x = "", y = "Number of TFBS", title = "")
stack <- ggplot(tfbs_confidence_long, aes(fill = confidence, y = value, x = version)) +
geom_bar(position = "stack", stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "none") +
labs(x = "", y = "Number of TFBS", title = "")
line <- ggplot(tfbs_confidence_long, aes(group = confidence, y = value, x = version)) +
geom_line(aes(color = confidence)) +
geom_point(size = 2, aes(color = confidence)) +
scale_color_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "right") +
labs(x = "Release version", y = "Number of TFBS", title = "")
## ggplotly to make it interactive
# fig0 <- ggplotly(TFBS_num)
fig1 <- ggplotly(dodge)
fig2 <- ggplotly(stack)
fig3 <- ggplotly(line)
subplot(fig1, fig2, fig3, nrows=3)confidence_summary <- all_tfbs %>%
group_by(version, confidence) %>%
summarise(value = n()) %>%
mutate(confidence = ifelse(is.na(confidence), "null", as.character(confidence))) %>%
data.frame() %>%
pivot_wider(names_from = version, values_from = c(value)) %>%
mutate(across(starts_with('v'), ~replace_na(.,0))) %>%
bind_rows(summarise(.,
across(where(is.numeric), sum),
across(where(is.character), ~"total"))) %>%
mutate(confidence = factor(confidence, levels = c("weak", "strong", "confirmed", "null", "total"))) %>%
arrange(confidence)
DT::datatable(confidence_summary, rownames= FALSE, options = list(searching = FALSE, lengthChange = FALSE, pageLength = 10))Comments:
ri_ids <- list()
for (v in tfbs_versions) {
ri_ids[[v]] <- unique( (get(paste0("tfbs_set_", v)))$RI_ID)
}
UpSetR::upset(fromList(ri_ids), sets = tfbs_versions, order.by = "freq", keep.order = T,
text.scale = c(2,2,2,2,2,2),
queries = list(
list(query = intersects, params = list("v10.9", "v10.10", "v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T),
list(query = intersects, params = list("v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T),
list(query = intersects, params = list("v10.7", "v10.8", "v10.9", "v10.10"), color = "red", active = T),
list(query = intersects, params = list("v10.7", "v10.8"), color = "red", active = T)
)
)ris_ids_dupli <- all_tfbs %>%
dplyr::group_by(version, RI_ID) %>%
dplyr::summarise(occurrences = n()) %>%
group_by(version, occurrences) %>%
summarise(ris_number = n())%>%
dplyr::mutate(occurrences = factor(occurrences))
###
dodge <- ggplot(ris_ids_dupli, aes(fill = occurrences, y = ris_number, x = version)) +
geom_bar(position = position_dodge(preserve = "single"), stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14)) +
labs(x = "", y = "Number of unique RI IDs", title = "RI ID duplication in BindingSiteSet.txt across versions")
dodge2 <- ggplot(ris_ids_dupli %>% dplyr::filter(!occurrences %in% c("1")), aes(fill = occurrences, y = ris_number, x = version)) +
geom_bar(position = position_dodge(preserve = "single"), stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14),
legend.position = "none") +
labs(x = "", y = "Number of unique RI IDs", title = "...minimum 2 copies")
dodge / dodge2 tf_ids <- list()
for (v in tfbs_versions) {
tf_ids[[v]] <- unique( (get(paste0("tfbs_set_", v)))$TF_ID)
}
UpSetR::upset(fromList(tf_ids), sets = tfbs_versions, order.by = "freq", keep.order = T,
text.scale = c(2,2,2,2,2,2),
sets.x.label = "Number of unique TF IDs",
queries = list(
list(query = intersects, params = list("v10.8", "v10.9", "v10.10", "v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T),
list(query = intersects, params = list("v10.9", "v10.10", "v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T),
list(query = intersects, params = list("v10.10", "v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T),
list(query = intersects, params = list("v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T),
list(query = intersects, params = list("v10.7", "v10.8", "v10.9"), color = "red", active = T),
list(query = intersects, params = list("v10.7"), color = "red", active = T))
)tf_ids_gone <- rownames(fromList(tf_ids) %>% filter(v10.7 == 1 & v11.0.2 == 0))
tf_names <- list()
for (v in tfbs_versions) {
tf_names[[v]] <- unique( (get(paste0("tfbs_set_", v)))$TF_name)
}
UpSetR::upset(fromList(tf_names), sets = tfbs_versions, order.by = "freq", keep.order = T,
text.scale = c(2,2,2,2,2,2),
sets.x.label = "Number of unique TF names",
queries = list(
list(query = intersects, params = list("v10.9", "v10.10", "v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T),
list(query = intersects, params = list("v10.10", "v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T),
list(query = intersects, params = list("v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T),
list(query = intersects, params = list("v10.7", "v10.8", "v10.9"), color = "red", active = T))
)tf_names_gone <- rownames(fromList(tf_names) %>% filter(v10.7 == 1 & v11.0.2 == 0))promoter_name <- list()
for (v in tfbs_versions) {
promoter_name[[v]] <- unique( (get(paste0("tfbs_set_", v)))$promoter)
}
UpSetR::upset(fromList(promoter_name), sets = tfbs_versions, order.by = "freq", keep.order = T,
text.scale = c(2,2,2,2,2,2),
sets.x.label = "Number of unique promoter names",
queries = list(
list(query = intersects, params = list("v10.9", "v10.10", "v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T),
list(query = intersects, params = list("v10.10", "v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T),
list(query = intersects, params = list("v10.7", "v10.8", "v10.9"), color = "red", active = T))
)tu_ids <- list()
for (v in tfbs_versions) {
tu_ids[[v]] <- unique( (get(paste0("tfbs_set_", v)))$TU_ID)
}
UpSetR::upset(fromList(tu_ids), sets = tfbs_versions, order.by = "freq", keep.order = T,
text.scale = c(2,2,2,2,2,2),
sets.x.label = "Number of unique TU IDs",
queries = list(
list(query = intersects, params = list("v10.9", "v10.10", "v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T),
list(query = intersects, params = list("v10.7", "v10.8", "v10.9"), color = "red", active = T))
)tu_names <- list()
for (v in tfbs_versions) {
tu_names[[v]] <- unique( (get(paste0("tfbs_set_", v)))$TU_name)
}
UpSetR::upset(fromList(tu_names), sets = tfbs_versions, order.by = "freq", keep.order = T,
text.scale = c(2,2,2,2,2,2),
sets.x.label = "Number of unique TU names",
queries = list(
list(query = intersects, params = list("v10.8", "v10.9", "v10.10", "v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T),
list(query = intersects, params = list("v10.9", "v10.10", "v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T),
list(query = intersects, params = list("v10.10", "v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T),
list(query = intersects, params = list("v10.7", "v10.8", "v10.9"), color = "red", active = T))
)Notes:
Many (most?) differences seem to be caused by different promoter names
Ex.
## Join versions 10.7 and 11.0.2 by TFBS_ID and promoter name
tfbs_join_107_1102 <- tfbs_set_v10.7 %>%
dplyr::full_join(tfbs_set_v11.0.2, by = c("TFBS_ID", "promoter"), suffix = c("_10.7", "_11.0.2")) %>%
dplyr::arrange(TFBS_ID) %>%
dplyr::select(TFBS_ID, promoter, starts_with("TF_name"), starts_with("TU"), starts_with("coords"), starts_with("evidence_1"), starts_with("confidence"))
# select(TFBS_ID, promoter, everything())
tfbs_matches_107_1102 <- tfbs_join_107_1102 %>% na.omit
tfbs_differences_107_1102 <- dplyr::setdiff(tfbs_join_107_1102, tfbs_matches_107_1102)
write.table(tfbs_join_107_1102, file = "TFBS_full_join_107_1102.tsv", quote = F, row.names = F, col.names = T, sep = "\t")
write.table(tfbs_differences_107_1102, file = "TFBS_differences_107_1102.tsv", quote = F, row.names = F, col.names = T, sep = "\t")
## display selection of columns
# tfbs_differences_107_1102_simple <- tfbs_differences_107_1102 %>%
# dplyr::select(TFBS_ID, promoter, starts_with("TF_name"), starts_with("TU"), starts_with("coords"), starts_with("evidence_1"), starts_with("confidence"))
DT::datatable(tfbs_differences_107_1102, rownames= FALSE, options = list(searching = TRUE, lengthChange = FALSE, pageLength = 5,
columnDefs = list(list(width = '200px', targets = c(11,12)))
))# all_tfbs_RI_TU <- tfbs_set_v10.7 %>%
# dplyr::full_join(tfbs_set_v10.6.3 %>% dplyr::select(TFBS_ID, TF_name, RI_ID, TU_name, evidence, confidence),
# by = c("TFBS_ID", "TF_name", "RI_ID", "TU_name"), suffix = c("_10.6", "_10.6.3")) %>%
# dplyr::full_join(tfbs_set_v10.7 %>% dplyr::select(TFBS_ID, TF_name, RI_ID, TU_name, evidence, confidence),
# by = c("TFBS_ID", "TF_name", "RI_ID", "TU_name"), suffix = c("_10.6.3", "_10.7")) %>%
# dplyr::full_join(tfbs_set_v10.8 %>% dplyr::select(TFBS_ID, TF_name, RI_ID, TU_name, evidence, confidence),
# by = c("TFBS_ID", "TF_name", "RI_ID", "TU_name"), suffix = c("_10.7", "_10.8")) %>%
# dplyr::full_join(tfbs_set_v10.9 %>% dplyr::select(TFBS_ID, TF_name, RI_ID, TU_name, evidence, confidence),
# by = c("TFBS_ID", "TF_name", "RI_ID", "TU_name"), suffix = c("_10.8", "_10.9")) %>%
# dplyr::full_join(tfbs_set_v10.10 %>% dplyr::select(TFBS_ID, TF_name, RI_ID, TU_name, evidence, confidence),
# by = c("TFBS_ID", "TF_name", "RI_ID", "TU_name"), suffix = c("_10.9", "_10.10")) %>%
# dplyr::full_join(tfbs_set_v11.0 %>% dplyr::select(TFBS_ID, TF_name, RI_ID, TU_name, evidence, confidence),
# by = c("TFBS_ID", "TF_name", "RI_ID", "TU_name"), suffix = c("_10.10", "_11.0")) %>%
# dplyr::full_join(tfbs_set_v11.0.1 %>% dplyr::select(TFBS_ID, TF_name, RI_ID, TU_name, evidence, evidence_function, confidence),
# by = c("TFBS_ID", "TF_name", "RI_ID", "TU_name"), suffix = c("_11.0", "_11.0.1")) %>%
# dplyr::full_join(tfbs_set_v11.0.2 %>% dplyr::select(TFBS_ID, TF_name, RI_ID, TU_name, evidence, evidence_function, confidence),
# by = c("TFBS_ID", "TF_name", "RI_ID", "TU_name"), suffix = c("_11.0.1", "_11.0.2")) %>%
# dplyr::arrange(TFBS_ID) %>%
# dplyr::select(-conformation_name, -TF_ID) %>%
# dplyr::rename(confidence_11.0.2 = confidence, evidence_11.0.2 = evidence)tfbs_new_from_10.9 <- all_tfbs_RI_TU %>%
dplyr::filter(is.na(evidence_10.6) & is.na(confidence_10.6) &
is.na(evidence_10.6.3) & is.na(confidence_10.6.3) &
is.na(evidence_10.7) & is.na(confidence_10.7) &
is.na(evidence_10.8) & is.na(confidence_10.8))
write.table(tfbs_new_from_10.9, file = "TFBS_new_10.9.tsv", quote = F, row.names = F, col.names = T, sep = "\t")
# DT::datatable(tfbs_new_from_10.9, rownames= FALSE, options = list(searching = FALSE, lengthChange = FALSE, pageLength = 5))tfbs_confirmed_10.8 <- all_tfbs_RI_TU %>%
dplyr::filter(confidence_10.8 == "Confirmed")
write.table(tfbs_confirmed_10.8, file = "TFBS_confirmed_10.8.tsv", quote = F, row.names = F, col.names = T, sep = "\t")
# DT::datatable(tfbs_confirmed_10.8, rownames= FALSE, options = list(searching = FALSE, lengthChange = FALSE, pageLength = 5))versions_nw <- setdiff(dir_versions, c("10.6", "10.10", "11.0.2"))
for(v in versions_nw){
assign(paste0("network_set_", v), read.delim(paste0(dir_releases, "/", v, "/network_tf_gene.tsv"), comment.char = "#", header = T, stringsAsFactors = F, na.strings = "") %>%
dplyr::mutate(version = paste0("v", v)) #%>%
# dplyr::mutate(ifelse(effect == ))
)
}NB: additional TAB characters at the end of each line cause parsing issues
tf_gene_summary <- list()
for(v in versions_nw){
tf_gene_summary[[paste0("v", v)]] <- data.frame(version = paste0("v", v), count = nrow(get(paste0("network_set_", v))))
}
tf_gene_summary_df <- data.table::rbindlist(tf_gene_summary)
tf_gene_summary_df$version <- factor(tf_gene_summary_df$version, levels = paste0("v", versions_nw))
DT::datatable(tf_gene_summary_df, rownames= FALSE, options = list(searching = FALSE, lengthChange = FALSE, pageLength = 10))tf_gene_num <- simple_bar(tf_gene_summary_df, "version", "count") +
scale_fill_viridis(discrete = T) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14)) +
labs(x = "Release version", y = "Number of TFBSs", title = "")
tf_gene_numtf_gene_pairs <- list()
for (v in versions_nw) {
pairs <- get(paste0("network_set_", v)) %>%
rowwise() %>%
mutate(TF_gene = paste0(TF_name, "_", gene_name))
tf_gene_pairs[[paste0("v", v)]] <- unique(pairs$TF_gene)
}
UpSetR::upset(fromList(tf_gene_pairs), sets = paste0("v", versions_nw), order.by = "freq", keep.order = T,
text.scale = c(2,2,2,2,2,2),
# queries = list(list(query = intersects, params = list("v10.6", "v10.6.3", "v10.7"), color = "red", active = T),
# list(query = intersects, params = list("v10.9", "v10.10", "v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T))
)pairs_ids_dupli <- list()
for (v in versions_nw) {
pairs_ids_dupli[[paste0("v", v)]] <- get(paste0("network_set_", v)) %>%
rowwise() %>%
mutate(TF_gene = paste0(TF_name, "_", gene_name)) %>%
group_by(TF_gene) %>%
summarise(occurrences = n()) %>%
group_by(occurrences) %>%
summarise(pairs_number = n()) %>%
mutate(version = paste0("v", v))
}
pairs_ids_dupli_df <- data.table::rbindlist(pairs_ids_dupli) %>%
dplyr::mutate(occurrences = factor(occurrences))
pairs_ids_dupli_df$version <- factor(pairs_ids_dupli_df$version, levels = paste0("v", versions_nw))
###
dodge <- ggplot(pairs_ids_dupli_df, aes(fill = occurrences, y = pairs_number, x = version)) +
geom_bar(position = position_dodge(preserve = "single"), stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14)) +
labs(x = "", y = "Number of pairs", title = "TF-gene pairs duplication in network_tf_gene.txt across versions")
dodge2 <- ggplot(pairs_ids_dupli_df %>% dplyr::filter(occurrences != 1), aes(fill = occurrences, y = pairs_number, x = version)) +
geom_bar(position = position_dodge(preserve = "single"), stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14),
legend.position = "none") +
labs(x = "", y = "Number of pairs", title = "...minimum 2 copies")
dodge / dodge2 NB: evidence is formatted in different ways between versions of downloadable file, which can cause parsing issues
# tf_gene_evidence <- bind_rows(network_set_10.6.3, network_set_10.7, network_set_10.8, network_set_10.9, network_set_11.0) %>%
# tidyr::separate_rows(evidence, sep = ",") %>%
# dplyr::mutate(evidence_code = gsub("\\[|\\]", "", evidence))
#
# last <- network_set_11.0.1 %>%
# dplyr::mutate(evidence_function = gsub("\\[\\]", "\\[\\|\\|\\]", evidence_function)) %>%
# dplyr::mutate(evidence = gsub("\\[\\]", "\\[\\|\\|\\]", evidence)) %>%
# dplyr::rowwise() %>%
# dplyr::mutate(evidence = concat_uniq(evidence, evidence_function)) %>%
# tidyr::separate_rows(evidence, sep = ",") %>%
# dplyr::mutate(evidence = gsub("\\[|\\]", "", evidence)) %>%
# tidyr::separate(evidence, c("evidence_code", "evidence_level", "evidence_name"), sep = "\\|")
# # dplyr::mutate(fevidence = gsub("\\[|\\]", "", evidence_function)) %>%
# # tidyr::separate(fevidence, c("fevidence_code", "fevidence_level", "fevidence_name"), sep = "\\|")
#
# tf_gene_evidence <- bind_rows(tf_gene_evidence, last)
# tf_gene_evidence$version <- factor(tf_gene_evidence$version, levels = paste0("v", versions_nw))
tf_gene_evidence <- bind_rows(network_set_10.7, network_set_10.8, network_set_10.9, network_set_11.0) %>%
tidyr::separate_rows(evidence, sep = ",") %>%
dplyr::mutate(evidence_code = gsub("\\[|\\]", "", evidence))
last <- network_set_11.0.1 %>%
dplyr::mutate(evidence_function = gsub("\\[\\]", "\\[\\|\\|\\]", evidence_function)) %>%
dplyr::mutate(evidence = gsub("\\[\\]", "\\[\\|\\|\\]", evidence)) %>%
dplyr::rowwise() %>%
dplyr::mutate(evidence = paste0(evidence, ",", evidence_function)) %>%
tidyr::separate_rows(evidence, sep = ",") %>%
dplyr::mutate(evidence = gsub("\\[|\\]", "", evidence)) %>%
tidyr::separate(evidence, c("evidence_code", "evidence_level", "evidence_name"), sep = "\\|")
# dplyr::mutate(fevidence = gsub("\\[|\\]", "", evidence_function)) %>%
# tidyr::separate(fevidence, c("fevidence_code", "fevidence_level", "fevidence_name"), sep = "\\|")
tf_gene_evidence <- bind_rows(tf_gene_evidence, last)%>%
dplyr::mutate(evidence_code = trimws(evidence_code))
tf_gene_evidence$version <- factor(tf_gene_evidence$version, levels = paste0("v", versions_nw))
#
##
tf_gene_evidence_long <- tf_gene_evidence %>%
group_by(version, evidence_code) %>%
summarise(value = n())
dodge <- ggplot(tf_gene_evidence_long, aes(fill = evidence_code, y = value, x = version, group = evidence_code)) +
geom_bar(position = position_dodge(preserve = "single"), stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "right") +
labs(x = "", y = "Number of entries", title = "")
stack <- ggplot(tf_gene_evidence_long, aes(fill = evidence_code, y = value, x = version, group = evidence_code)) +
geom_bar(position = "stack", stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "none") +
labs(x = "", y = "Number of evidence", title = "")
line <- ggplot(tf_gene_evidence_long, aes(y = value, x = version, group = evidence_code)) +
geom_line(aes(color = evidence_code)) +
geom_point(size = 2, aes(color = evidence_code)) +
scale_color_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "none") +
labs(x = "Release version", y = "Number of entries", title = "")
fig0 <- ggplotly(tf_gene_num)
fig1<-ggplotly(dodge)
fig2<-ggplotly(stack)
fig3<-ggplotly(line)
subplot(fig0, fig1, fig2, fig3, nrows=4, shareY = TRUE)NB: confidence is now written in the downloadable file in all caps, which can cause mapping issues
confidence_nw_all_versions <- list()
for(v in versions_nw){
confidence_nw_all_versions[[v]] <- get(paste0("network_set_", v)) %>%
group_by(version, confidence) %>%
summarise(value = n())
}
confidence_nw_all_versions_df <- data.table::rbindlist(confidence_nw_all_versions) %>%
dplyr::mutate(confidence = tolower(confidence))
confidence_nw_all_versions_df$confidence <- factor(confidence_nw_all_versions_df$confidence, levels = c("weak", "strong", "confirmed"))
confidence_nw_all_versions_df$version <- factor(confidence_nw_all_versions_df$version, levels = paste0("v", versions_nw))
##
tf_gene_confidence_long <- confidence_nw_all_versions_df
dodge <- ggplot(tf_gene_confidence_long, aes(fill = confidence, y = value, x = version)) +
geom_bar(position = position_dodge(preserve = "single"), stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "right") +
labs(x = "", y = "Number of entries", title = "")
stack <- ggplot(tf_gene_confidence_long, aes(fill = confidence, y = value, x = version)) +
geom_bar(position = "stack", stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "none") +
labs(x = "", y = "Number of TFBS", title = "")
line <- ggplot(tf_gene_confidence_long, aes(group = confidence, y = value, x = version)) +
geom_line(aes(color = confidence)) +
geom_point(size = 2, aes(color = confidence)) +
scale_color_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "none") +
labs(x = "Release version", y = "Number of entries", title = "")
fig0 <- ggplotly(tf_gene_num)
fig1<-ggplotly(dodge)
fig2<-ggplotly(stack)
fig3<-ggplotly(line)
subplot(fig0, fig1, fig2, fig3, nrows=4, shareY = T)confidence_nw_summary <- confidence_nw_all_versions_df %>%
arrange(confidence) %>%
mutate(confidence = ifelse(is.na(confidence), "null", as.character(confidence))) %>%
# mutate(value = ifelse(is.na(value), 0, value)) %>%
data.frame() %>%
pivot_wider(names_from = version, values_from = c(value)) %>%
mutate(across(starts_with('v'), ~replace_na(.,0))) %>%
bind_rows(summarise(.,
across(where(is.numeric), sum),
across(where(is.character), ~"total")))
DT::datatable(confidence_nw_summary, rownames= FALSE, options = list(searching = FALSE, lengthChange = FALSE, pageLength = 10))all_tf_gene <- network_set_10.6.3 %>%
dplyr::full_join(network_set_10.7 %>% dplyr::select(TF_name, gene_name, effect, evidence, confidence),
by = c("TF_name", "gene_name", "effect"), suffix = c("_10.6.3", "_10.7")) %>%
dplyr::full_join(network_set_10.8 %>% dplyr::select(TF_name, gene_name, effect, evidence, confidence),
by = c("TF_name", "gene_name", "effect"), suffix = c("_10.7", "_10.8")) %>%
dplyr::full_join(network_set_10.9 %>% dplyr::select(TF_name, gene_name, effect, evidence, confidence),
by = c("TF_name", "gene_name", "effect"), suffix = c("_10.8", "_10.9")) %>%
dplyr::full_join(network_set_11.0 %>% dplyr::select(TF_name, gene_name, effect, evidence, confidence),
by = c("TF_name", "gene_name", "effect"), suffix = c("_10.9", "_11.0")) %>%
dplyr::full_join(network_set_11.0.1 %>% dplyr::select(TF_name, gene_name, effect, evidence, evidence_function, confidence),
by = c("TF_name", "gene_name", "effect"), suffix = c("_11.0", "_11.0.1")) %>%
dplyr::arrange(TF_name, gene_name)
# distinct()
# dplyr::rename(confidence_11.0.2 = confidence, evidence_11.0.2 = evidence)
write.table(all_tf_gene, file = "all_tf_gene.tsv", quote = F, row.names = F, col.names = T, sep = "\t")versions_nw <- setdiff(dir_versions, c("10.10", "11.0.2"))
for(v in versions_nw){
assign(paste0("network_tu_set_", v), read.delim(paste0(dir_releases, "/", v, "/network_tf_tu.tsv"), comment.char = "#", header = T, stringsAsFactors = F, na.strings = "")%>% dplyr::mutate(version = paste0("v", v)))
}NB: additional TAB characters at the end of each line cause parsing issues
tf_tu_summary <- list()
for(v in versions_nw){
tf_tu_summary[[paste0("v", v)]] <- data.frame(version = paste0("v", v), count = nrow(get(paste0("network_tu_set_", v))))
}
tf_tu_summary_df <- data.table::rbindlist(tf_tu_summary)
tf_gene_summary_df$version <- factor(tf_tu_summary_df$version, levels = paste0("v", versions_nw))
DT::datatable(tf_tu_summary_df, rownames= FALSE, options = list(searching = FALSE, lengthChange = FALSE, pageLength = 10))tf_tu_num <- simple_bar(tf_tu_summary_df, "version", "count") +
scale_fill_viridis(discrete = T) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14)) +
labs(x = "Release version", y = "Number of entries", title = "")
tf_tu_numtf_tu_pairs <- list()
for (v in versions_nw) {
pairs <- get(paste0("network_tu_set_", v)) %>%
rowwise() %>%
mutate(TF_TU = paste0(TF_name, "_", TU_name))
tf_tu_pairs[[paste0("v", v)]] <- unique(pairs$TF_TU)
}
UpSetR::upset(fromList(tf_tu_pairs), sets = paste0("v", versions_nw), order.by = "freq", keep.order = T,
text.scale = c(2,2,2,2,2,2),
# queries = list(list(query = intersects, params = list("v10.6", "v10.6.3", "v10.7"), color = "red", active = T),
# list(query = intersects, params = list("v10.9", "v10.10", "v11.0", "v11.0.1", "v11.0.2"), color = "blue", active = T))
)pairs_tu_ids_dupli <- list()
for (v in versions_nw) {
pairs_tu_ids_dupli[[paste0("v", v)]] <- get(paste0("network_tu_set_", v)) %>%
rowwise() %>%
mutate(TF_TU = paste0(TF_name, "_", TU_name)) %>%
group_by(TF_TU) %>%
summarise(occurrences = n()) %>%
group_by(occurrences) %>%
summarise(pairs_number = n()) %>%
mutate(version = paste0("v", v))
}
pairs_tu_ids_dupli_df <- data.table::rbindlist(pairs_tu_ids_dupli) %>%
dplyr::mutate(occurrences = factor(occurrences))
pairs_tu_ids_dupli_df$version <- factor(pairs_tu_ids_dupli_df$version, levels = paste0("v", versions_nw))
###
dodge <- ggplot(pairs_tu_ids_dupli_df, aes(fill = occurrences, y = pairs_number, x = version)) +
geom_bar(position = position_dodge(preserve = "single"), stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14)) +
labs(x = "", y = "Number of pairs", title = "TF-TU pairs duplication in network_tf_gene.txt across versions")
dodge NB: evidence is formatted in different ways between versions of downloadable file, which can cause parsing issues
tf_tu_evidence <- bind_rows(network_tu_set_10.7, network_tu_set_10.8, network_tu_set_10.9, network_tu_set_11.0) %>%
tidyr::separate_rows(evidence, sep = ",") %>%
dplyr::mutate(evidence_code = gsub("\\[|\\]", "", evidence))
last <- network_tu_set_11.0.1 %>%
dplyr::mutate(evidence_function = gsub("\\[\\]", "\\[\\|\\|\\]", evidence_function)) %>%
dplyr::mutate(evidence = gsub("\\[\\]", "\\[\\|\\|\\]", evidence)) %>%
dplyr::rowwise() %>%
dplyr::mutate(evidence = paste0(evidence, ",", evidence_function)) %>%
tidyr::separate_rows(evidence, sep = ",") %>%
dplyr::mutate(evidence = gsub("\\[|\\]", "", evidence)) %>%
tidyr::separate(evidence, c("evidence_code", "evidence_level", "evidence_name"), sep = "\\|")
# dplyr::mutate(fevidence = gsub("\\[|\\]", "", evidence_function)) %>%
# tidyr::separate(fevidence, c("fevidence_code", "fevidence_level", "fevidence_name"), sep = "\\|")
tf_tu_evidence <- bind_rows(tf_tu_evidence, last)%>%
dplyr::mutate(evidence_code = trimws(evidence_code))
tf_tu_evidence$version <- factor(tf_tu_evidence$version, levels = paste0("v", versions_nw))
##
tf_tu_evidence_long <- tf_tu_evidence %>%
group_by(version, evidence_code) %>%
summarise(value = n())
dodge <- ggplot(tf_tu_evidence_long, aes(fill = evidence_code, y = value, x = version, group = evidence_code)) +
geom_bar(position = position_dodge(preserve = "single"), stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "right") +
labs(x = "", y = "Number of entries", title = "")
stack <- ggplot(tf_tu_evidence_long, aes(fill = evidence_code, y = value, x = version, group = evidence_code)) +
geom_bar(position = "stack", stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "none") +
labs(x = "", y = "Number of evidence", title = "")
line <- ggplot(tf_tu_evidence_long, aes(y = value, x = version, group = evidence_code)) +
geom_line(aes(color = evidence_code)) +
geom_point(size = 2, aes(color = evidence_code)) +
scale_color_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "none") +
labs(x = "Release version", y = "Number of entries", title = "")
fig0 <- ggplotly(tf_tu_num)
fig1<-ggplotly(dodge)
fig2<-ggplotly(stack)
fig3<-ggplotly(line)
subplot(fig0, fig1, fig2, fig3, nrows=4, shareY = TRUE)NB: confidence is now written in the downloadable file in all caps, which can cause mapping issues
confidence_nw_all_versions <- list()
for(v in versions_nw){
confidence_nw_all_versions[[v]] <- get(paste0("network_set_", v)) %>%
group_by(version, confidence) %>%
summarise(value = n())
}
confidence_nw_all_versions_df <- data.table::rbindlist(confidence_nw_all_versions) %>%
dplyr::mutate(confidence = tolower(confidence))
confidence_nw_all_versions_df$confidence <- factor(confidence_nw_all_versions_df$confidence, levels = c("weak", "strong", "confirmed"))
confidence_nw_all_versions_df$version <- factor(confidence_nw_all_versions_df$version, levels = paste0("v", versions_nw))
##
tf_gene_confidence_long <- confidence_nw_all_versions_df
dodge <- ggplot(tf_gene_confidence_long, aes(fill = confidence, y = value, x = version)) +
geom_bar(position = position_dodge(preserve = "single"), stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "right") +
labs(x = "", y = "Number of entries", title = "")
stack <- ggplot(tf_gene_confidence_long, aes(fill = confidence, y = value, x = version)) +
geom_bar(position = "stack", stat = "identity") +
scale_fill_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "none") +
labs(x = "", y = "Number of TFBS", title = "")
line <- ggplot(tf_gene_confidence_long, aes(group = confidence, y = value, x = version)) +
geom_line(aes(color = confidence)) +
geom_point(size = 2, aes(color = confidence)) +
scale_color_viridis(discrete = T, na.value = "gray", drop = F) +
theme_minimal() +
theme(axis.text = element_text(size = 14), axis.title.y = element_text(size = 14), title = element_text(size = 14), legend.position = "none") +
labs(x = "Release version", y = "Number of entries", title = "")
fig0 <- ggplotly(tf_gene_num)
fig1<-ggplotly(dodge)
fig2<-ggplotly(stack)
fig3<-ggplotly(line)
subplot(fig0, fig1, fig2, fig3, nrows=4, shareY = T)confidence_nw_summary <- confidence_nw_all_versions_df %>%
arrange(confidence) %>%
mutate(confidence = ifelse(is.na(confidence), "null", as.character(confidence))) %>%
# mutate(value = ifelse(is.na(value), 0, value)) %>%
data.frame() %>%
pivot_wider(names_from = version, values_from = c(value)) %>%
mutate(across(starts_with('v'), ~replace_na(.,0))) %>%
bind_rows(summarise(.,
across(where(is.numeric), sum),
across(where(is.character), ~"total")))
DT::datatable(confidence_nw_summary, rownames= FALSE, options = list(searching = FALSE, lengthChange = FALSE, pageLength = 10))save.image(file = paste0("Binding_dataset_report.Rdata"))